In [1]:
# Import libraries.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt 
%matplotlib inline

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import plotly.express as px
import plotly.io as pio

import networkx as nx

import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)

import warnings 
warnings.filterwarnings('ignore')
In [2]:
# Read data in and make a copy.
data_in1 = pd.read_csv ('Highest GDP Countries and Wages.csv', sep = ',')

data_in = data_in1.copy()

data_in
Out[2]:
Country Top Export Exports Imports Import / Export Ratio 2022 Population 2022 GDP 2022 GDP per capita Est. 2023 Population Est. 2023 GDP ... Est. 2024 GDP Est. 2024 GDP per capita Pop. Growth 2013-2022 Pop. Growth / Year Urban Pop Fert. Rate Med. Age Income Unemployment Area
0 Switzerland Gold 620424 356763 0.58 8775745 818426 93260 8857359 878442 ... 938458 105669 8.49 0.93 75 1.5 42 95490 4.30 15942
1 Norway Petroleum 321076 107268 0.33 5329047 579422 108729 5373810 553186 ... 526951 94660 7.43 0.84 86 1.5 40 94540 3.23 148449
2 Luxembourg Iron 163585 26068 0.16 652208 81530 125006 665839 85043 ... 88556 131384 20.20 2.09 88 1.4 39 89200 4.58 998
3 Ireland Blood 723121 147913 0.20 5120211 532415 103983 5176021 548217 ... 564020 106059 10.89 1.09 64 1.8 38 79730 4.48 27458
4 United States Petroleum 3011859 3375948 1.12 337273680 25744100 76330 339331049 27262591 ... 28781083 85373 4.41 0.61 83 1.7 38 76770 3.65 3809525
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
58 Pakistan Clothing 38700 71105 1.84 205661422 326796 1589 210494465 332516 ... 338237 1461 23.30 2.35 35 3.3 21 1560 5.60 307373
59 Nepal Soybean oil 2722 13716 5.04 29473448 39406 1337 29841866 41792 ... 44179 1397 13.49 1.25 22 2.0 24 1340 10.92 56827
60 Sudan Gold 5908 7448 1.26 33329401 36729 1102 34202631 31797 ... 26865 547 26.44 2.62 35 4.3 19 760 17.59 710689
61 Somalia Goats 1363 3519 2.58 17601351 10420 592 18182195 11612 ... 12804 776 34.71 3.30 46 6.1 15 600 19.29 246199
62 Afghanistan Gold 1476 4689 3.18 39814606 14174 356 40937377 14320 ... 14467 422 27.45 2.82 26 4.4 17 380 14.10 252072

63 rows × 22 columns

In [3]:
# Display general information of the dataset.
data_in.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 22 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Country                   63 non-null     object 
 1   Top Export                63 non-null     object 
 2   Exports                   63 non-null     int64  
 3   Imports                   63 non-null     int64  
 4   Import / Export Ratio     63 non-null     float64
 5   2022 Population           63 non-null     int64  
 6   2022 GDP                  63 non-null     int64  
 7   2022 GDP per capita       63 non-null     int64  
 8   Est. 2023 Population      63 non-null     int64  
 9   Est. 2023 GDP             63 non-null     int64  
 10  Est. 2023 GDP per capita  63 non-null     int64  
 11  Est. 2024 Population      63 non-null     int64  
 12  Est. 2024 GDP             63 non-null     int64  
 13  Est. 2024 GDP per capita  63 non-null     int64  
 14  Pop. Growth 2013-2022     63 non-null     float64
 15  Pop. Growth / Year        63 non-null     float64
 16  Urban Pop                 63 non-null     int64  
 17  Fert. Rate                63 non-null     float64
 18  Med. Age                  63 non-null     int64  
 19  Income                    63 non-null     int64  
 20  Unemployment              63 non-null     float64
 21  Area                      63 non-null     int64  
dtypes: float64(5), int64(15), object(2)
memory usage: 11.0+ KB
In [4]:
# Reduce dataset and reposition columns..
df1 = data_in.iloc[:,[0, 6, 5, 15, 16, 17, 18, 19, 20, 21]].copy()

df1
Out[4]:
Country 2022 GDP 2022 Population Pop. Growth / Year Urban Pop Fert. Rate Med. Age Income Unemployment Area
0 Switzerland 818426 8775745 0.93 75 1.5 42 95490 4.30 15942
1 Norway 579422 5329047 0.84 86 1.5 40 94540 3.23 148449
2 Luxembourg 81530 652208 2.09 88 1.4 39 89200 4.58 998
3 Ireland 532415 5120211 1.09 64 1.8 38 79730 4.48 27458
4 United States 25744100 337273680 0.61 83 1.7 38 76770 3.65 3809525
... ... ... ... ... ... ... ... ... ... ...
58 Pakistan 326796 205661422 2.35 35 3.3 21 1560 5.60 307373
59 Nepal 39406 29473448 1.25 22 2.0 24 1340 10.92 56827
60 Sudan 36729 33329401 2.62 35 4.3 19 760 17.59 710689
61 Somalia 10420 17601351 3.30 46 6.1 15 600 19.29 246199
62 Afghanistan 14174 39814606 2.82 26 4.4 17 380 14.10 252072

63 rows × 10 columns

In [5]:
# Display general mathematical information.
df1.describe()
Out[5]:
2022 GDP 2022 Population Pop. Growth / Year Urban Pop Fert. Rate Med. Age Income Unemployment Area
count 6.300000e+01 6.300000e+01 63.000000 63.000000 63.000000 63.000000 63.000000 63.000000 6.300000e+01
mean 1.446446e+06 9.929541e+07 0.902857 67.539683 2.047619 34.650794 26517.460317 6.474921 5.797850e+05
std 3.919106e+06 2.519857e+08 1.081826 20.319882 0.967514 8.719941 27606.832719 4.871535 1.217321e+06
min 1.042000e+04 6.522080e+05 -1.200000 19.000000 1.200000 15.000000 380.000000 0.230000 9.980000e+02
25% 1.026310e+05 9.499294e+06 0.295000 54.000000 1.500000 28.500000 4055.000000 3.640000 3.399800e+04
50% 4.088020e+05 3.332940e+07 0.900000 71.000000 1.700000 37.000000 12750.000000 4.980000 1.306890e+05
75% 1.058088e+06 7.768432e+07 1.520000 84.500000 2.050000 41.500000 49200.000000 7.730000 4.044750e+05
max 2.574410e+07 1.437387e+09 3.410000 99.000000 6.100000 49.000000 95490.000000 28.840000 6.592812e+06
In [6]:
# Display the scatter plot / histogram matrix.
sns.pairplot(df1)
Out[6]:
<seaborn.axisgrid.PairGrid at 0x24971b34700>
In [7]:
# Create a dataframe by condition of median age greater than or equal to 43.
age_high = df1.loc[(df1["Med. Age"] >= 43)].copy()

age_high
Out[7]:
Country 2022 GDP 2022 Population Pop. Growth / Year Urban Pop Fert. Rate Med. Age Income Unemployment Area
9 Austria 470302 9029509 0.70 59 1.5 43 55720 4.99 32388
11 Finland 282511 5553369 0.26 87 1.4 43 54890 6.72 130689
12 Germany 4076923 83684120 0.41 77 1.5 45 54030 3.14 138068
19 Japan 4232173 124413469 -0.20 94 1.3 49 42550 2.60 145934
20 Italy 2046952 58861053 -0.10 72 1.3 48 38200 8.07 116629
21 Spain 1415874 47712687 0.21 80 1.3 45 32090 12.92 195360
23 Portugal 254849 10395635 -0.10 67 1.4 46 25950 6.01 35608
24 Lithuania 70878 2827767 -0.53 71 1.6 44 23870 5.96 25207
25 Latvia 40876 1876767 -0.79 69 1.6 44 21850 6.81 24940
26 Greece 217285 10412852 -0.57 86 1.4 45 21810 12.43 50984
29 Bulgaria 90213 6455775 -1.20 78 1.6 45 13350 4.27 42614
35 Serbia 63563 6664185 -0.77 69 1.5 43 9290 8.68 29957
In [8]:
# Create a dataframe by condition of median age less than or equal to 24.
age_low = df1.loc[(df1["Med. Age"] <= 24)].copy()

age_low
Out[8]:
Country 2022 GDP 2022 Population Pop. Growth / Year Urban Pop Fert. Rate Med. Age Income Unemployment Area
44 Iraq 264182 44497557 3.41 71 3.4 20 5270 15.32 167974
46 Egypt 409306 95298253 2.55 41 2.8 24 4100 6.40 384788
52 Bolivia 44008 12224444 1.65 69 2.5 24 3490 3.55 424162
55 Nigeria 475058 219629218 2.71 54 5.1 17 2160 3.83 356667
57 Cameroon 44341 28351023 2.65 58 4.3 18 1640 3.78 179942
58 Pakistan 326796 205661422 2.35 35 3.3 21 1560 5.60 307373
59 Nepal 39406 29473448 1.25 22 2.0 24 1340 10.92 56827
60 Sudan 36729 33329401 2.62 35 4.3 19 760 17.59 710689
61 Somalia 10420 17601351 3.30 46 6.1 15 600 19.29 246199
62 Afghanistan 14174 39814606 2.82 26 4.4 17 380 14.10 252072
In [9]:
# Create Series for chart data and axes values.
y_fert = age_low['Fert. Rate'].sort_values().copy()

y_age = age_low['Med. Age'].sort_values().copy()

x_fert = age_high['Fert. Rate'].sort_values().copy()

x_age = age_high['Med. Age'].sort_values().copy()

y_income = age_low['Income'].sort_values().copy()

x_income = age_high['Income'].sort_values().copy()

x_country = age_high['Country'].sort_values().copy()

y_country = age_low['Country'].sort_values().copy()


# Create the figure area.
fig1 = make_subplots(rows = 2, cols = 2)


# Add the subplots.
fig1 = fig1.add_trace(
    go.Scatter(x = x_fert, y = x_age, text = x_age,        
      
                  mode = 'lines+markers+text', textfont = dict(color = 'black', weight = 'bold', size = 11), textposition = 'bottom right'),
    row = 1, col = 1
)

fig1 = fig1.add_trace(
    go.Scatter(x = x_income, y = x_age, text = x_age,
                  mode = 'lines+markers+text', textfont = dict(color = 'black', weight = 'bold', size = 11), textposition = 'top left'),
    row = 1, col = 2
)


fig1 = fig1.add_trace(
    go.Scatter(x = y_fert, y = y_age, text = y_age,
                  mode='lines+markers+text', textfont = dict(color ='black', weight = 'bold', size = 11), textposition = 'bottom right'),
    row=2, col=1 
)

fig1 = fig1.add_trace(
    go.Scatter(x = y_income, y = y_age, text = y_age,
                  mode='lines+markers+text', textfont=dict(color = 'black', weight = 'bold', size = 11), textposition = 'top left'), 
    row = 2, col = 2
) 
 

# Change size of figure and add title.
fig1 = fig1.update_layout(height = 800, width = 1200, title_text = "     <b>   Minimum / Maximum Median Age by Income and Fertility Rate<b>", font_family = "Courier New",
     font_size = 22, title_font_family = "Cursive", title_font_color = "crimson", showlegend = False)

# Label the axes on the subplots.
fig1 = fig1.update_xaxes(tickcolor = 'red' , title_text = "<b>Fertility Rate<b>", title_font=dict(size=30, family = 'tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 1, col = 1)
fig1 = fig1.update_yaxes(title_text = "<b>Median Age<b>", title_font = dict(size=30, family='tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 1, col = 1)
fig1 = fig1.update_xaxes(title_text = "<b>Income<b>", title_font = dict(size=30, family='tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 1, col = 2)
fig1 = fig1.update_yaxes(row = 1, col = 2, tickfont = dict(color = "darkorange"), tickprefix = "<b>", )
fig1 = fig1.update_yaxes(title_text = "<b>Median Age<b>", title_font = dict(size = 30, family = 'tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 2, col = 1)
fig1 = fig1.update_xaxes(title_text = "<b>Fertility Rate<b>", title_font = dict(size = 30, family = 'tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 2, col = 1)
fig1 = fig1.update_xaxes(title_text = "<b>Income<b>", title_font = dict(size = 30, family = 'tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 2, col = 2)
fig1 = fig1.update_yaxes(row = 2, col = 2, title_font_color = "red", tickfont=dict(color = "darkorange"),tickprefix="<b>", )

fig1.show()
In [10]:
plt.style.use('fivethirtyeight')

# Function to annotate with value labels and arced arrows.
def arrow_labels(ax, x, y, label, k):
    G = nx.DiGraph()
    data_nodes = []
    init_pos = {}
    for xi, yi, label in zip(x, y, label):
        data_str = 'data_{0}'.format(label)
        G.add_node(data_str)
        G.add_node(label)
        G.add_edge(label, data_str)
        data_nodes.append(data_str)
        init_pos[data_str] = (xi, yi)
        init_pos[label] = (xi, yi)

    pos = nx.spring_layout(G, pos = init_pos, fixed = data_nodes, k = k)

    for label, data_str in G.edges():
        ax.annotate(label,
                    xy = pos[data_str], xycoords = 'data',
                    xytext = pos[label], textcoords = 'data',
                    arrowprops = dict(arrowstyle = "->",
                                    shrinkA = -5, shrinkB = -17,
                                    connectionstyle = "arc3, rad = 0.5", 
                                    color = 'red'), fontsize = 20, color = 'blue') 
        
# Get lists of the top 10 countries sorted by unemployment.
x_vals0 = df1[['Country', 'Unemployment', 'Med. Age']].sort_values(by = 'Unemployment', ascending = False).copy()
x_vals1 = x_vals0['Unemployment'][:10].copy()
y_vals1 = x_vals0['Med. Age'][:10].copy()
vals1 = x_vals0['Country'][:10].copy()
lst1 = vals1.values.tolist().copy()

# Create subplots.
fig2, ax = plt.subplots()

# Specify plot parameters.
ax.plot(x_vals1, y_vals1, color = 'green', marker = '*', linestyle = 'dashed',
     linewidth = 2, markersize = 30, mfc = 'orange', mec = 'blue', label = lst1)

# Set the size of the figure.
fig2.set_figwidth(25)
fig2.set_figheight(18)

# Set axis limits
ax.set_xlim(9, 31)
ax.set_ylim(13, 45.5)

# Add x and y axis labels.
ax.set_xlabel('Unemployment (%)', labelpad = 25, fontsize = 28, color = 'red' )
ax.set_ylabel('Median Age', labelpad = 25, fontsize = 28, color = 'red')

# Change the font size of xticks & yticks
plt.xticks(fontsize = 20)
plt.yticks(fontsize = 20)

fig2 = fig2.suptitle("Top 10 Countries by Unemployment Rate and Median Age", fontsize = 40, color = 'darkolivegreen')

x_lst1 = x_vals1.values.tolist().copy()
y_lst1 = y_vals1.values.tolist().copy()

arrow_labels(ax, x_lst1, y_lst1, lst1, k = .835)

plt.savefig('Top 10 Countries by Unemployment & Median Age.png')

plt.show()
In [ ]:
 
In [ ]: